## Import packages
library("dplyr")
library("Seurat") # scRNA-seq analysis
library("patchwork")
if (!"SeuratWrappers" %in% installed.packages()) remotes::install_github("satijalab/seurat-wrappers", "seurat5")
library("SeuratWrappers")
if (!"batchelor" %in% installed.packages()) BiocManager::install("batchelor")
if ((!"matrixStats" %in% installed.packages()) | (packageVersion("matrixStats")>"1.1.0")) {
remotes::install_version("matrixStats", version="1.1.0")
}
if (!"ComplexHeatmap" %in% installed.packages()) remotes::install_github("jokergoo/ComplexHeatmap")
if (!"zellkonverter" %in% installed.packages()) BiocManager::install("zellkonverter")
(7 min)
# Import data
data.dir <- "../data"
seu <- readRDS(file = file.path(data.dir, "jurkat.rds"))
You may want to down sample this data set depending on the amount of
RAM memory you have. The jurkat data set has 8,664
cells.
## Downsample data set
downsample <- TRUE # replace to FALSE in case you don't want to down sample
prop.down <- 0.4 # proportion of cells to down sample per batch: 40% of the cells
if (downsample) {
no.cells.batch <- ceiling(table(seu$batch) * 0.4) # CTRL = 1310 and STIM = 1491
cell.idx.batch <- split(x = colnames(seu), f = seu$batch) # split into a list the cell names per batch
cell.idx.batch.down <- lapply(X = setNames(names(cell.idx.batch), names(cell.idx.batch)), FUN = function(x) {
set.seed(123)
sample(x = cell.idx.batch[[x]], size = no.cells.batch[[x]], replace = FALSE)
}) # downsample each batch cell names
cell.idx.downsample <- do.call(c, cell.idx.batch.down) # join cell name labels from the two batches into one character vector
seu <- subset(seu, cells = cell.idx.downsample)
}
gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 3293228 175.9 4808767 256.9 4808767 256.9
## Vcells 23036169 175.8 81939233 625.2 84635607 645.8
(7 min)
## Joint analysis
# Standard Seurat upstream workflow
seu <- NormalizeData(seu)
seu <- FindVariableFeatures(seu)
seu <- ScaleData(seu)
seu <- RunPCA(seu)
seu <- RunUMAP(seu, dims = 1:30, reduction = "pca", reduction.name = "umap.unintegrated")
## Plot jointly dimreds
pca.unint <- DimPlot(seu, reduction = "pca", group.by = "batch")
umap.unint <- DimPlot(seu, reduction = "umap.unintegrated", group.by = "batch")
pca.unint + umap.unint
## Joint celltype markers
# List of jurkat and T293 cell lines
markers.plot <- list(
"jurkat" = "CD3D",
"t293" = "XIST"
)
# Plot
jurkat.markers.unint.plot <- FeaturePlot(seu, features = markers.plot$jurkat, split.by = "batch",
max.cutoff = 3, cols = c("grey", "red"),
reduction = "umap.unintegrated", ncol = 4,
pt.size = 0.5)
t293.markers.unint.plot <- FeaturePlot(seu, features = markers.plot$t293, split.by = "batch",
max.cutoff = 3, cols = c("grey", "red"),
reduction = "umap.unintegrated", ncol = 4,
pt.size = 0.5)
## Plot jointly celltype markers
# Print
jurkat.markers.unint.plot
t293.markers.unint.plot
## Independent sample analysis
# Split Seurat object into two batch on 'batch' label identity
seu.list <- SplitObject(object = seu, split.by = "batch")
# Standard Seurat upstream workflow
seu.list <- lapply(X = seu.list, FUN = function(x) {
x <- NormalizeData(x)
x <- FindVariableFeatures(x)
x <- ScaleData(x)
x <- RunPCA(x)
x <- FindNeighbors(x, dims = 1:15, reduction = "pca")
x <- FindClusters(x, resolution = 0.1, cluster.name = "unintegrated_clusters")
x <- RunUMAP(x, dims = 1:15, reduction = "pca", reduction.name = "umap.unintegrated")
})
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1142
## Number of edges: 45157
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9000
## Number of communities: 1
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1303
## Number of edges: 48196
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9000
## Number of communities: 1
## Elapsed time: 0 seconds
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 1022
## Number of edges: 40793
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9496
## Number of communities: 2
## Elapsed time: 0 seconds
## Plot independent sample analysis clusters
umap.ind.samp.unint <- lapply(X = seu.list, FUN = function(x) {
DimPlot(x, reduction = "umap.unintegrated", group.by = "unintegrated_clusters", pt.size = 0.5)
})
umap.ind.samp.unint$t293 + umap.ind.samp.unint$jurkat + umap.ind.samp.unint$`jurkat_t293_50:50`
(7 min)
## Perform integration
# Split layers for integration
seu[["RNA"]] <- split(x = seu[["RNA"]], f = seu$batch)
# Standard workflow
seu <- NormalizeData(seu)
seu <- FindVariableFeatures(seu)
seu <- ScaleData(seu)
seu <- RunPCA(seu)
# Integrate layers
int.methods <- c("CCA" = "CCAIntegration", "RPCA" = "RPCAIntegration",
"Harmony" = "HarmonyIntegration", "FastMNN" = "FastMNNIntegration",
"scVI" = "scVIIntegration")
for (m in names(int.methods)[1:4]) {
cat("\nRunning integration method", m, "...\n")
int.dimred <- paste0("integrated.", m)
umap.dimred <- paste0("umap.", m)
# Integration
if (m=="scVI") {
seu <- IntegrateLayers(object = seu, method = get(eval(substitute(int.methods[m]))),
orig.reduction = "pca",
new.reduction = int.dimred,
conda_env = "/home/aggs/miniconda3/envs/scvi-env", # substitute this by your installation
verbose = TRUE)
} else {
seu <- IntegrateLayers(object = seu, method = get(eval(substitute(int.methods[m]))),
orig.reduction = "pca",
new.reduction = int.dimred,
verbose = TRUE)
}
}
##
## Running integration method CCA ...
##
## Running integration method RPCA ...
##
## Running integration method Harmony ...
##
## Running integration method FastMNN ...
# Re-join layers after integration
seu[["RNA"]] <- JoinLayers(seu[["RNA"]])
# Run UMAP for every integration method
int.umaps.plots <- list()
for (m in names(int.methods)[1:4]) {
cat("\nRunning UMAP for", m, "integrated result...\n")
int.dimred <- paste0("integrated.", m)
umap.dimred <- paste0("umap.", m)
seu <- RunUMAP(seu, dims = 1:30, reduction = int.dimred, reduction.name = umap.dimred)
int.umaps.plots[[m]] <- DimPlot(object = seu, reduction = umap.dimred, group.by = c("batch", "cell_type"),
combine = FALSE, label.size = 2)
}
##
## Running UMAP for CCA integrated result...
##
## Running UMAP for RPCA integrated result...
##
## Running UMAP for Harmony integrated result...
##
## Running UMAP for FastMNN integrated result...
(7 min)
## Assess integration by printing the plots using the "batch" and "cell_type" (ground-truth) labels
wrap_plots(c(int.umaps.plots$CCA, int.umaps.plots$RPCA, int.umaps.plots$Harmony, int.umaps.plots$FastMNN),
ncol = 2, byrow = TRUE)
## R packages and versions used in these analyses
sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_GB.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=fi_FI.UTF-8 LC_COLLATE=en_GB.UTF-8
## [5] LC_MONETARY=fi_FI.UTF-8 LC_MESSAGES=en_GB.UTF-8
## [7] LC_PAPER=fi_FI.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=fi_FI.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] SeuratWrappers_0.3.2 patchwork_1.1.2 Seurat_5.1.0
## [4] SeuratObject_5.0.2 sp_1.5-1 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] spam_2.9-1 plyr_1.8.7
## [3] igraph_1.3.5 lazyeval_0.2.2
## [5] splines_4.1.0 RcppHNSW_0.4.1
## [7] BiocParallel_1.28.3 listenv_0.8.0
## [9] scattermore_1.2 GenomeInfoDb_1.30.1
## [11] ggplot2_3.4.0 digest_0.6.30
## [13] htmltools_0.5.5 fansi_1.0.3
## [15] magrittr_2.0.3 ScaledMatrix_1.2.0
## [17] tensor_1.5 cluster_2.1.4
## [19] ROCR_1.0-11 remotes_2.4.2
## [21] globals_0.16.2 matrixStats_1.1.0
## [23] R.utils_2.12.1 spatstat.sparse_3.0-0
## [25] colorspace_2.0-3 ggrepel_0.9.2
## [27] xfun_0.43 RCurl_1.98-1.9
## [29] jsonlite_1.8.4 progressr_0.11.0
## [31] spatstat.data_3.0-0 survival_3.4-0
## [33] zoo_1.8-11 glue_1.6.2
## [35] polyclip_1.10-4 gtable_0.3.1
## [37] zlibbioc_1.40.0 XVector_0.34.0
## [39] leiden_0.4.3 DelayedArray_0.20.0
## [41] BiocSingular_1.10.0 SingleCellExperiment_1.16.0
## [43] future.apply_1.10.0 BiocGenerics_0.40.0
## [45] abind_1.4-5 scales_1.3.0
## [47] spatstat.random_3.0-1 miniUI_0.1.1.1
## [49] Rcpp_1.0.9 viridisLite_0.4.1
## [51] xtable_1.8-4 reticulate_1.26
## [53] rsvd_1.0.5 dotCall64_1.0-2
## [55] ResidualMatrix_1.4.0 stats4_4.1.0
## [57] htmlwidgets_1.6.2 httr_1.4.5
## [59] RColorBrewer_1.1-3 ellipsis_0.3.2
## [61] ica_1.0-3 scuttle_1.4.0
## [63] pkgconfig_2.0.3 R.methodsS3_1.8.2
## [65] farver_2.1.1 sass_0.4.2
## [67] uwot_0.1.16 deldir_1.0-6
## [69] utf8_1.2.2 tidyselect_1.2.0
## [71] labeling_0.4.2 rlang_1.1.2
## [73] reshape2_1.4.4 later_1.3.0
## [75] munsell_0.5.0 tools_4.1.0
## [77] cachem_1.0.6 cli_3.6.1
## [79] generics_0.1.3 ggridges_0.5.4
## [81] batchelor_1.10.0 evaluate_0.17
## [83] stringr_1.5.0 fastmap_1.1.0
## [85] yaml_2.3.6 goftest_1.2-3
## [87] RhpcBLASctl_0.21-247.1 knitr_1.46
## [89] fitdistrplus_1.1-8 purrr_1.0.1
## [91] RANN_2.6.1 sparseMatrixStats_1.6.0
## [93] pbapply_1.6-0 future_1.29.0
## [95] nlme_3.1-160 mime_0.12
## [97] R.oo_1.25.0 compiler_4.1.0
## [99] plotly_4.10.1 png_0.1-7
## [101] spatstat.utils_3.0-1 tibble_3.2.1
## [103] bslib_0.4.0 stringi_1.7.8
## [105] highr_0.9 RSpectra_0.16-1
## [107] lattice_0.20-45 Matrix_1.6-5
## [109] vctrs_0.6.5 pillar_1.9.0
## [111] lifecycle_1.0.3 BiocManager_1.30.19
## [113] spatstat.geom_3.0-3 lmtest_0.9-40
## [115] jquerylib_0.1.4 RcppAnnoy_0.0.20
## [117] BiocNeighbors_1.12.0 bitops_1.0-7
## [119] data.table_1.14.10 cowplot_1.1.1
## [121] irlba_2.3.5.1 GenomicRanges_1.46.1
## [123] httpuv_1.6.6 R6_2.5.1
## [125] promises_1.2.0.1 KernSmooth_2.23-20
## [127] gridExtra_2.3 IRanges_2.28.0
## [129] parallelly_1.32.1 codetools_0.2-18
## [131] fastDummies_1.7.3 MASS_7.3-60
## [133] SummarizedExperiment_1.24.0 withr_2.5.0
## [135] sctransform_0.4.1 GenomeInfoDbData_1.2.7
## [137] harmony_1.2.0 S4Vectors_0.32.4
## [139] parallel_4.1.0 beachmat_2.10.0
## [141] grid_4.1.0 tidyr_1.3.0
## [143] DelayedMatrixStats_1.16.0 rmarkdown_2.26
## [145] MatrixGenerics_1.6.0 Rtsne_0.16
## [147] spatstat.explore_3.0-5 Biobase_2.54.0
## [149] shiny_1.7.3